In [8]:
# pip install --upgrade tensorflow tensorflow-io
# pip install -q -U keras-nlp
# pip install -q -U keras>=3
In [7]:
# General Imports
import tensorflow as tf
import pandas as pd
import numpy as np
import random
import os

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Building Model
from keras.utils import plot_model
from tensorflow.keras import models
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import legacy

# Training Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import ModelCheckpoint

# Data Processing
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.preprocessing.image import load_img
2024-04-29 05:25:15.820198: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 05:25:15.820332: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 05:25:15.967266: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
In [9]:
# Global variables
SAVE = False
SEED = 111

# Setting seed for consistent results
tf.keras.utils.set_random_seed(SEED)
tf.random.set_seed(SEED)
np.random.seed(SEED)

# Data Visualization updates
%config InlineBackend.figure_format = 'retina'
plt.rcParams["figure.figsize"] = (16, 10)
plt.rcParams.update({'font.size': 14})

# Data Classifications
CLASS_TYPES = ['pituitary', 'notumor', 'meningioma', 'glioma']
N_TYPES = len(CLASS_TYPES)

Importing Data¶

In [10]:
# Function for inporting data           
def get_data_labels(directory, shuffle=True, random_state=0):
    """
    Function used for going into the main training directory
    whose directory has sub-class-types.
    """
    from sklearn.utils import shuffle
    import os

    # Lists to store data and labels
    data_path = []
    data_labels = []
    
    for label in os.listdir(directory):
        label_dir = os.path.join(directory, label)

        # Avoid MacOS storing path
        if not os.path.isdir(label_dir):
            continue

        # Going into each folder and getting image path
        for image in os.listdir(label_dir):
            image_path = os.path.join(label_dir, image)
            data_path.append(image_path)
            data_labels.append(label)
            
    if shuffle:
        data_path, data_labels = shuffle(data_path, data_labels, random_state=random_state)
            
    return data_path, data_labels
In [11]:
# Setting up file paths for training and testing
USER_PATH = r"/kaggle/input/brain-tumor-mri-dataset"
train_dir = USER_PATH + r'/Training/'
test_dir = USER_PATH + r'/Testing/'

# Getting data using above function
train_paths, train_labels = get_data_labels(train_dir)
test_paths, test_labels = get_data_labels(test_dir)

# Printing traing and testing sample sizes
print('Training')
print(f'Number of Paths: {len(train_paths)}')
print(f'Number of Labels: {len(train_labels)}')
print('\nTesting')
print(f'Number of Paths: {len(test_paths)}')
print(f'Number of Labels: {len(test_labels)}')
Training
Number of Paths: 5712
Number of Labels: 5712

Testing
Number of Paths: 1311
Number of Labels: 1311
In [12]:
_, ax = plt.subplots(ncols=3, figsize=(20, 14))

# Plotting training data types
class_counts = [len([x for x in train_labels if x == label]) for label in CLASS_TYPES]
print('Training Counts')
print(dict(zip(CLASS_TYPES, class_counts)))

ax[0].set_title('Training Data')
ax[0].pie(
    class_counts,
    labels=[label.title() for label in CLASS_TYPES],
    colors=['#FAC500','#0BFA00', '#0066FA','#FA0000'], 
    autopct=lambda p: '{:.2f}%\n{:,.0f}'.format(p, p * sum(class_counts) / 100),
    explode=tuple(0.01 for i in range(N_TYPES)),
    textprops={'fontsize': 20}
)

# Plotting distribution of train test split
ax[1].set_title('Train Test Split')
ax[1].pie(
    [len(train_labels), len(test_labels)],
    labels=['Train','Test'],
    colors=['darkcyan', 'orange'], 
    autopct=lambda p: '{:.2f}%\n{:,.0f}'.format(p, p * sum([len(train_labels), len(test_labels)]) / 100),
    explode=(0.1, 0),
    startangle=85,
    textprops={'fontsize': 20}
)

# Plotting testing data types
class_counts = [len([x for x in test_labels if x == label]) for label in CLASS_TYPES]
print('\nTesting Counts')
print(dict(zip(CLASS_TYPES, class_counts)))

ax[2].set_title('Testing Data')
ax[2].pie(
    class_counts,
    labels=[label.title() for label in CLASS_TYPES],
    colors=['#FAC500', '#0BFA00', '#0066FA', '#FA0000'],
    autopct=lambda p: '{:.2f}%\n{:,.0f}'.format(p, p * sum(class_counts) / 100),
    explode=tuple(0.01 for i in range(N_TYPES)),  # Explode the slices slightly for better visualization
    textprops={'fontsize': 20}  # Set the font size for the text on the pie chart
)


plt.show()
Training Counts
{'pituitary': 1457, 'notumor': 1595, 'meningioma': 1339, 'glioma': 1321}

Testing Counts
{'pituitary': 300, 'notumor': 405, 'meningioma': 306, 'glioma': 300}
In [13]:
# getting image to test output
im = load_img(train_paths[3], target_size=(150, 150))
im = img_to_array(im)

# Reshape it to (1, 150, 150, 3)
im = np.expand_dims(im, axis=0)
print(f'x reshaped: {im.shape}')

# normilzation tensor
im /= np.max(im) # ~ np.max(img_tensor)

# Convert the array back to the image format
im = array_to_img(im[0])
display(im)
x reshaped: (1, 150, 150, 3)
In [14]:
# Function to display a list of images based on the given index
def show_images(paths, label_paths, index_list=range(10), im_size=250, figsize=(12, 8), save=False):
    """
    Show images from a given path based on the inputted
    list indices related to the desired images one wishes
    to see.
    """

    num_images = len(index_list)
    num_rows = (num_images + 3) // 4
    
    _, ax = plt.subplots(nrows=num_rows, ncols=4, figsize=figsize)
    ax = ax.flatten()

    for i, index in enumerate(index_list):
        if i >= num_images:
            break
        
        image = load_img(paths[index], target_size=(im_size, im_size))
        ax[i].imshow(image)
        ax[i].set_title(f'{index}: {label_paths[index]}')
        ax[i].axis('off')

    plt.tight_layout()
    
    if save:
        plt.savefig('show_image.pdf')
    else:
        plt.show()
In [15]:
# Four different data classification images, from three different angles (images are independent)
show_images(train_paths, train_labels, im_size=350, figsize=(13,10),
            index_list=[0, 94, 235, 17,
                        61, 324, 55, 45,
                        374, 65, 391, 488])

Data Processing & Training Setup Values¶

In [16]:
# Image size
image_size = (150, 150)

# Training batch size
batch_size = 128
In [17]:
# Data augmentation and preprocessing
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=10,
                                   brightness_range=(0.85, 1.15),
                                   width_shift_range=0.002,
                                   height_shift_range=0.002,
                                   shear_range=12.5,
                                   zoom_range=0,
                                   horizontal_flip=True,
                                   vertical_flip=False,
                                   fill_mode="nearest")


# applying the generator to training data with constant seed
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=image_size,
                                                    batch_size=batch_size,
                                                    class_mode="categorical",
                                                    seed=SEED)

# No augmentation of the test data, just rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# applying the generator to testing data with constant seed
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=image_size,
                                                  batch_size=batch_size,
                                                  class_mode="categorical",
                                                  shuffle=False,
                                                  seed=SEED)
Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.

Data Augmentation Class Indices¶

In [18]:
# Accessing class indices for training data generator
class_indices_train = train_generator.class_indices
class_indices_train_list = list(train_generator.class_indices.keys())


# Displaying categorical types
print("Categorical types for the training data:")
print(class_indices_train)
Categorical types for the training data:
{'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}

Training Setup Values¶

In [19]:
# Image shape: height, width, RBG
image_shape = (image_size[0], image_size[1], 3)

# Training epochs
epochs = 40

# Steps per epoch
steps_per_epoch = train_generator.samples // batch_size

# Validation steps
validation_steps = test_generator.samples // batch_size

print(f'Image shape: {image_shape}')
print(f'Epochs: {epochs}')
print(f'Batch size: {batch_size}')
print(f'Steps Per Epoch: {steps_per_epoch}')
print(f'Validation steps: {validation_steps}')
Image shape: (150, 150, 3)
Epochs: 40
Batch size: 128
Steps Per Epoch: 44
Validation steps: 10
In [20]:
def plot_sample_predictions(model, test_generator, categories, test_dir, num_samples=9, figsize=(12, 8)):
    """
    Function to plot images of sample predictions
    """
    # Make predictions on the test dataset
    predictions = model.predict(test_generator)
    predicted_categories = np.argmax(predictions, axis=1)
    true_categories = test_generator.classes

    # Randomly sample test images
    test_images = np.array(test_generator.filepaths)
    sample_indices = np.random.choice(len(test_images), size=num_samples, replace=False)
    sample_images = test_images[sample_indices]
    sample_predictions = [categories[predicted_categories[i]] for i in sample_indices]
    sample_true_labels = [categories[true_categories[i]] for i in sample_indices]

    # Plot sample images with their predicted and true labels
    plt.figure(figsize=figsize)
    
    # Loop over samples
    for i, image_path in enumerate(sample_images):
        # Form subplot and plot
        plt.subplot(3, 3, i + 1)
        img = plt.imread(image_path)
        plt.imshow(img)
        plt.axis("off")
        
        # Set axis label color depending on correct prediction or not
        prediction_color = 'green' if sample_predictions[i] == sample_true_labels[i] else 'red'
        plt.title(f"Predicted: {sample_predictions[i]}\nTrue: {sample_true_labels[i]}", color=prediction_color)
        
    plt.tight_layout()
    plt.show()

    
def CM(CNN_model, test_generator, categories):
    """
    Function to return the confusion matrix of a given CNN model.
    """
    from sklearn.metrics import confusion_matrix
    # Predictions on test dataset
    predictions = CNN_model.predict(test_generator)
    predicted_categories = np.argmax(predictions, axis=1)
    true_categories = test_generator.classes

    # Create a confusion matrix
    confusion_matrix_array = confusion_matrix(true_categories, predicted_categories)
    
    return confusion_matrix_array


def calculate_metrics(confusion_matrix, categories):
    """
    Function to calculate important metrics for multi-classification problems.
    """
    # Calculating 4 different metrics
    precision = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=0)
    recall = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=1)
    f1_score = 2 * (precision * recall) / (precision + recall)
    accuracy = np.sum(np.diag(confusion_matrix)) / np.sum(confusion_matrix)

    # Printing the results based on each category
    for i, category in enumerate(categories):
        print(f"Class: {category.title()}")
        print(f"Precision: {precision[i]:.3f}")
        print(f"Recall: {recall[i]:.3f}")
        print(f"F1-Score: {f1_score[i]:.3f}\n")
        
    # Showing the total accuracy of the model
    print(f"\nAccuracy: {accuracy:.3f}")
In [21]:
from tensorflow.keras import layers
model = models.Sequential([
    # Convolutional layer 1
    layers.Conv2D(32, (4, 4), activation="relu", input_shape=image_shape),
    layers.MaxPooling2D(pool_size=(3, 3)),

    # Convolutional layer 2
    layers.Conv2D(64, (4, 4), activation="relu"),
    layers.MaxPooling2D(pool_size=(3, 3)),

    # Convolutional layer 3
    layers.Conv2D(128, (4, 4), activation="relu"),
    layers.MaxPooling2D(pool_size=(3, 3)),

    # Convolutional layer 4
    layers.Conv2D(128, (4, 4), activation="relu"),
    layers.Flatten(),

    # Full connect layers
    layers.Dense(512, activation="relu"),
    layers.Dropout(0.5, seed=SEED),

    # Add LSTM layer
    layers.Reshape((-1, 512)),  # Reshape output from Dense layer to fit into LSTM
    layers.LSTM(128),  # LSTM layer with 128 units

    layers.Dense(N_TYPES, activation="softmax")
])


model.summary()

# optimizer = legacy.Adam(learning_rate=0.001, beta_1=0.869, beta_2=0.995)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics= ['accuracy'])
/opt/conda/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py:99: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                 │ (None, 147, 147, 32)   │         1,568 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 49, 49, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 46, 46, 64)     │        32,832 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 15, 15, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 12, 12, 128)    │       131,200 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_2 (MaxPooling2D)  │ (None, 4, 4, 128)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 1, 1, 128)      │       262,272 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten (Flatten)               │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 512)            │        66,048 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 512)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ reshape (Reshape)               │ (None, 1, 512)         │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm (LSTM)                     │ (None, 128)            │       328,192 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 4)              │           516 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 822,628 (3.14 MB)
 Trainable params: 822,628 (3.14 MB)
 Non-trainable params: 0 (0.00 B)

Training Model¶

In [22]:
# Stop training if loss doesn't keep decreasing.
model_es = EarlyStopping(monitor='loss', min_delta=1e-9, patience=8, verbose=True)
model_rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=5, verbose=True)

# Training the model
history = model.fit(train_generator,
                    steps_per_epoch=steps_per_epoch,
                    epochs=epochs,
                    validation_data=test_generator,
                    validation_steps=validation_steps,
                    callbacks=[model_es, model_rlr])
Epoch 1/40
/opt/conda/lib/python3.10/site-packages/keras/src/trainers/data_adapters/py_dataset_adapter.py:120: UserWarning: Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.
  self._warn_if_super_not_called()
44/44 ━━━━━━━━━━━━━━━━━━━━ 93s 2s/step - accuracy: 0.3385 - loss: 1.2903 - val_accuracy: 0.5031 - val_loss: 1.3082 - learning_rate: 0.0010
Epoch 2/40
 1/44 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - accuracy: 0.5078 - loss: 1.0874
/opt/conda/lib/python3.10/contextlib.py:153: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.
  self.gen.throw(typ, value, traceback)
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step - accuracy: 0.5078 - loss: 1.0874 - val_accuracy: 0.8710 - val_loss: 0.6508 - learning_rate: 0.0010
Epoch 3/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 51s 995ms/step - accuracy: 0.6834 - loss: 0.7441 - val_accuracy: 0.7250 - val_loss: 0.6610 - learning_rate: 0.0010
Epoch 4/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.6562 - loss: 0.6988 - val_accuracy: 0.9677 - val_loss: 0.2476 - learning_rate: 0.0010
Epoch 5/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 50s 975ms/step - accuracy: 0.7766 - loss: 0.5374 - val_accuracy: 0.7336 - val_loss: 0.5844 - learning_rate: 0.0010
Epoch 6/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.7656 - loss: 0.5322 - val_accuracy: 0.9677 - val_loss: 0.2224 - learning_rate: 0.0010
Epoch 7/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 49s 942ms/step - accuracy: 0.8026 - loss: 0.4894 - val_accuracy: 0.7703 - val_loss: 0.5744 - learning_rate: 0.0010
Epoch 8/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.8203 - loss: 0.4316 - val_accuracy: 0.8710 - val_loss: 0.3662 - learning_rate: 0.0010
Epoch 9/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 49s 955ms/step - accuracy: 0.8259 - loss: 0.4136 - val_accuracy: 0.8133 - val_loss: 0.4442 - learning_rate: 0.0010
Epoch 10/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.9062 - loss: 0.3353 - val_accuracy: 0.9355 - val_loss: 0.2296 - learning_rate: 0.0010
Epoch 11/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 874ms/step - accuracy: 0.8606 - loss: 0.3544
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
44/44 ━━━━━━━━━━━━━━━━━━━━ 49s 947ms/step - accuracy: 0.8605 - loss: 0.3546 - val_accuracy: 0.8031 - val_loss: 0.5161 - learning_rate: 0.0010
Epoch 12/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.9297 - loss: 0.2394 - val_accuracy: 0.9355 - val_loss: 0.1608 - learning_rate: 3.0000e-04
Epoch 13/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 49s 948ms/step - accuracy: 0.8840 - loss: 0.2952 - val_accuracy: 0.8547 - val_loss: 0.3791 - learning_rate: 3.0000e-04
Epoch 14/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.9531 - loss: 0.1522 - val_accuracy: 0.9355 - val_loss: 0.1817 - learning_rate: 3.0000e-04
Epoch 15/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 49s 952ms/step - accuracy: 0.9044 - loss: 0.2635 - val_accuracy: 0.8570 - val_loss: 0.3933 - learning_rate: 3.0000e-04
Epoch 16/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.8906 - loss: 0.2517 - val_accuracy: 0.9032 - val_loss: 0.2781 - learning_rate: 3.0000e-04
Epoch 17/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 869ms/step - accuracy: 0.9191 - loss: 0.2215
Epoch 17: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
44/44 ━━━━━━━━━━━━━━━━━━━━ 49s 941ms/step - accuracy: 0.9191 - loss: 0.2215 - val_accuracy: 0.8383 - val_loss: 0.4635 - learning_rate: 3.0000e-04
Epoch 18/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.9141 - loss: 0.2469 - val_accuracy: 0.8387 - val_loss: 0.6436 - learning_rate: 9.0000e-05
Epoch 19/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 48s 939ms/step - accuracy: 0.9295 - loss: 0.2010 - val_accuracy: 0.8773 - val_loss: 0.3540 - learning_rate: 9.0000e-05
Epoch 20/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.9453 - loss: 0.1667 - val_accuracy: 0.9355 - val_loss: 0.3337 - learning_rate: 9.0000e-05
Epoch 21/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 48s 928ms/step - accuracy: 0.9319 - loss: 0.1942 - val_accuracy: 0.9000 - val_loss: 0.3029 - learning_rate: 9.0000e-05
Epoch 22/40
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 949us/step - accuracy: 0.9219 - loss: 0.2403 - val_accuracy: 0.9355 - val_loss: 0.1188 - learning_rate: 9.0000e-05
Epoch 22: early stopping

Model Evaluation¶

In [23]:
# Evaluating the model
loss, accuracy = model.evaluate(test_generator, steps=test_generator.samples//batch_size)
print(f"Test Loss: {loss:0.5f}")
print(f"Test Accuracy: {accuracy:0.5f}")
10/10 ━━━━━━━━━━━━━━━━━━━━ 3s 315ms/step - accuracy: 0.8490 - loss: 0.4242
Test Loss: 0.30677
Test Accuracy: 0.89688
In [24]:
_, ax = plt.subplots(ncols=2, figsize=(15, 6))

# Plot the training and validation accuracy over epochs
ax[0].plot(history.history['accuracy'])
ax[0].plot(history.history['val_accuracy'])
ax[0].set_title('Model 2 Accuracy')
ax[0].set_xlabel('Epoch')
ax[0].set_ylabel('Accuracy')
ax[0].legend(['Train', 'Validation'])
ax[0].grid(alpha=0.2)

# Plot the training and validation loss over epochs
ax[1].plot(history.history['loss'])
ax[1].plot(history.history['val_loss'])
ax[1].set_title('Model 2 Loss')
ax[1].set_xlabel('Epoch')
ax[1].set_ylabel('Loss')
ax[1].legend(['Train', 'Validation'])
ax[1].grid(alpha=0.2)

plt.show()
In [25]:
# Plotting confusion matrix
confusion_matrix = CM(CNN_model=model, test_generator=test_generator, categories=class_indices_train_list)

plt.figure(figsize=(8,8))
sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.xticks(ticks=np.arange(N_TYPES) + 0.5,
           labels=[name.title() for name in class_indices_train_list], ha='center')
plt.yticks(ticks=np.arange(N_TYPES) + 0.5, 
           labels=[name.title() for name in class_indices_train_list], va='center')
plt.show()
11/11 ━━━━━━━━━━━━━━━━━━━━ 4s 312ms/step
In [26]:
# Showing metrics
calculate_metrics(confusion_matrix, categories=class_indices_train_list)
Class: Glioma
Precision: 0.955
Recall: 0.847
F1-Score: 0.898

Class: Meningioma
Precision: 0.851
Recall: 0.745
F1-Score: 0.794

Class: Notumor
Precision: 0.854
Recall: 0.993
F1-Score: 0.918

Class: Pituitary
Precision: 0.958
Recall: 0.977
F1-Score: 0.967


Accuracy: 0.898
In [27]:
plot_sample_predictions(model=model, 
                        test_generator=test_generator, 
                        categories=class_indices_train_list,
                        test_dir=test_dir, 
                        num_samples=9,
                        figsize=(13, 12))
11/11 ━━━━━━━━━━━━━━━━━━━━ 3s 306ms/step
In [28]:
model.save('my_model.keras')
In [29]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array

image_path = "/kaggle/input/brain-tumor-classification-mri-images/brain_tumor_mri/new_dataset/bt_images/1.jpg"
# Load the image
image = load_img(image_path, target_size=(150, 150))


# Convert the image to an array
image = img_to_array(image)

# Reshape the image to (1, 150, 150, 3)
image = np.expand_dims(image, axis=0)

# Normalize the image
image /= np.max(image)

# Make a prediction
prediction = model.predict(image)

# Get the predicted class

predicted_class = np.argmax(prediction)
# Print the predicted class
class_labels = {0:'glioma', 1:'meningioma' ,  2:'notumor', 3:'pituitary'}
outcome = class_labels[predicted_class]
print(f"Predicted class: {outcome}")
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 309ms/step
Predicted class: glioma

Gemma for Text Generation¶

In [30]:
import keras
import keras_nlp
In [31]:
import os

os.environ["KERAS_BACKEND"] = "jax"  # Or "tensorflow" or "torch".
In [32]:
gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")
gemma_lm.summary()
Attaching 'config.json' from model 'keras/gemma/keras/gemma_2b_en/2' to your Kaggle notebook...
Attaching 'config.json' from model 'keras/gemma/keras/gemma_2b_en/2' to your Kaggle notebook...
Attaching 'model.weights.h5' from model 'keras/gemma/keras/gemma_2b_en/2' to your Kaggle notebook...
Attaching 'tokenizer.json' from model 'keras/gemma/keras/gemma_2b_en/2' to your Kaggle notebook...
Attaching 'assets/tokenizer/vocabulary.spm' from model 'keras/gemma/keras/gemma_2b_en/2' to your Kaggle notebook...
normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.
Preprocessor: "gemma_causal_lm_preprocessor"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Tokenizer (type)                                   ┃                                             Vocab # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ gemma_tokenizer (GemmaTokenizer)                   │                                             256,000 │
└────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘
Model: "gemma_causal_lm"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                  ┃ Output Shape              ┃         Param # ┃ Connected to               ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ padding_mask (InputLayer)     │ (None, None)              │               0 │ -                          │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ token_ids (InputLayer)        │ (None, None)              │               0 │ -                          │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ gemma_backbone                │ (None, None, 2048)        │   2,506,172,416 │ padding_mask[0][0],        │
│ (GemmaBackbone)               │                           │                 │ token_ids[0][0]            │
├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤
│ token_embedding               │ (None, None, 256000)      │     524,288,000 │ gemma_backbone[0][0]       │
│ (ReversibleEmbedding)         │                           │                 │                            │
└───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘
 Total params: 2,506,172,416 (9.34 GB)
 Trainable params: 2,506,172,416 (9.34 GB)
 Non-trainable params: 0 (0.00 B)
In [33]:
description = gemma_lm.generate(f"What is {outcome}?", max_length=256)
description += gemma_lm.generate(f"What are the symptoms of {outcome}?", max_length=256)
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1714369044.158714     145 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1714369044.238247     145 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
In [34]:
print(description)
What is glioma?

Glioma is a type of brain tumor that arises from the glial cells, which are the supportive cells of the brain and spinal cord. Gliomas are the most common type of primary brain tumor.

Glioma is a broad term that refers to a group of tumors that arise from the glial cells. There are several types of glioma, including:

* astrocytoma
* oligodendroglioma
* ependymoma
* pilocytic astrocytoma
* diffuse astrocytoma
* anaplastic astrocytoma
* oligodendroglioma
* ependymoma
* pilocytic astrocytoma
* diffuse astrocytoma
* anaplastic astrocytoma
* oligodendroglioma
* ependymoma
* pilocytic astrocytoma
* diffuse astrocytoma
* anaplastic astrocytoma

What are the symptoms of glioma?

Symptoms of glioma vary depending on the type of glioma.

Symptoms of glioma may include:

* headache
* seizures
* weakness
* vision problems
* difficulty speaking
* difficulty swallowing
* difficulty walking
* difficulty concentrating
* memory problems
* loss of balance
* loss ofWhat are the symptoms of glioma?

Glioma is a type of brain tumor that is characterized by abnormal cell growth. It is the most common type of primary brain tumor.

Symptoms of glioma may include:

* headache
* seizures
* vision problems
* weakness or numbness in the arms or legs
* difficulty speaking or understanding speech
* difficulty swallowing
* difficulty walking
* memory loss
* confusion
* dizziness
* nausea
* vomiting
* seizures
* loss of consciousness

What are the causes of glioma?

The exact cause of glioma is unknown. However, there are several risk factors that may increase the likelihood of developing glioma. These include:

* <strong>Age:</strong> Glioma is more common in adults over the age of 50.
* <strong>Gender:</strong> Glioma is more common in men than in women.
* <strong>Family history:</strong> If a family member has had a glioma, you are more likely to develop one.
* <strong>Race:</strong> Glioma is more common in Caucasians than in other races.
* <strong>Radiation exposure:</strong> Exposure to radiation, such as from a nuclear accident or medical treatment, may increase the risk
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: